knitr::opts_chunk$set(echo = TRUE)

packages_needed <- c("ggplot2", # graphics
                     "arm", # display() etc.
                     "ggfortify")
pk_to_install <- packages_needed [!( packages_needed %in% rownames(installed.packages())  )]
if(length(pk_to_install)>0 ){
  install.packages(pk_to_install,repos="http://cran.r-project.org")
}


library(ggplot2)
library(arm)
## Loading required package: MASS
## Loading required package: Matrix
## Loading required package: lme4
## 
## arm (Version 1.11-2, built: 2020-7-27)
## Working directory is D:/Documents/Accademics/APSU/AdvancedData_Class/2021_09-14_AdvancedData_GLM_Assignment
library(ggfortify)
Heloderma_Records <- read.csv("data/Heloderma_Records.csv")
Vernal_Pool_Database <- read.csv("data/Mass_Pool_Table.csv")

Gila_Data <- Heloderma_Records[Heloderma_Records$Species == "suspectum",]
Gila_Data <- Gila_Data[is.na(Gila_Data$HW) == FALSE,]
Gila_Data <- subset(Gila_Data, Sex == "M" | Sex == "F")
Gila_Data <- Gila_Data[Gila_Data$HW != 0,]
Gila_Data <- Gila_Data[Gila_Data$SVL > 220,]

for(i in 1:dim(Gila_Data)[1]){
  Gila_Data$Bi_Sex[i] <- ifelse(Gila_Data$Sex[i] == "F", 0, 1)}

head(Gila_Data)
##     Source    picture Catalog. Lizard_ID   Species Sex Status Collection_Date
## 156    ASU ASU194.JPG      194    ASU194 suspectum   M             12/28/1953
## 157    ASU ASU195.JPG      195    ASU195 suspectum   F               6/3/1954
## 158    ASU ASU196.JPG      196    ASU196 suspectum   F              4/29/1953
## 160    ASU ASU706.JPG      706    ASU706 suspectum   F              4/17/1956
## 161    ASU ASU908.JPG      908    ASU908 suspectum   M               4/5/1955
## 162    ASU ASU910.JPG      910    ASU910 suspectum   F              8/26/1955
##        collector                         Locality Collection_Notes
## 156 H.H. ROWLEYS                   ROOSEVELT LAKE         IN CHURN
## 157 R. LATTIMORE                     SE OF APACHE         IN CHURN
## 158   R. PRESTON BETWEEN FLORENCE AND QUEEN CREEK         IN CHURN
## 160                     CAMELBACK MT., SCOTTSDALE         IN CHURN
## 161                                       BUCKEYE         IN CHURN
## 162                                   CASA GRANDE         IN CHURN
##                                                      Dissection_Notes Country
## 156                                                                       USA
## 157                                   rt ovid = 4 bb sized follicles      USA
## 158                                                                       USA
## 160 lt ovid = 11.4, 10.1, 3peas; rt ovid = 6.3, 9.0, 11.8, and 2 peas     USA
## 161                                                 hemipenes everted     USA
## 162                        3 full term eggs (pics), 36.8, 34.9, 33.2      USA
##     Latitude Longitude   State elevation   County spatial_accuracy Study
## 156 33.66378 -111.1219 Arizona       673     Gila               20      
## 157       NA        NA Arizona        NA Maricopa               NA      
## 158 33.15537 -111.5181 Arizona       461 Maricopa               20      
## 160 33.51444 -111.9611 Arizona       767 Maricopa                5      
## 161 33.59174 -112.6895 Arizona       435 Maricopa               10      
## 162 32.87905 -111.7565 Arizona       424    Pinal                5      
##         georef_verif SVL insert_elbow Tail elbow_wrist Tail.Diam wrist_3rd   HL
## 156              Yes 314         20.9    0        22.9      29.3      29.6 54.5
## 157 ref not possible 285         20.6  130        23.9      23.0      29.6 46.4
## 158              Yes 314         17.9  145        21.8      22.1      29.9 52.2
## 160              Yes 279         18.1  129        23.2      24.1      29.7 46.3
## 161              Yes 283         19.4  158        25.8      29.3      29.5 49.6
## 162              Yes 311         19.8  135        29.6      29.0      32.4 50.6
##     trunk   HW Tail.Circum HL_ear_rostral Mass_grams  X Bi_Sex
## 156   178 46.7          NA           61.9            NA      1
## 157   160 44.3          NA           54.0            NA      0
## 158   188 41.8          NA           56.3            NA      0
## 160   175 40.0          NA           51.0            NA      0
## 161   163 45.7          NA           53.0            NA      1
## 162   187 47.2          NA           56.9            NA      0

Gila_Sex_v_HW_BinomialModel <- glm(Bi_Sex~HW, data=Gila_Data, binomial(link="logit"))

ggplot(Gila_Data,aes(HW,Bi_Sex)) +
  geom_point() + 
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  geom_hline(yintercept = mean(Gila_Data$Bi_Sex), linetype = "dashed") +
  ylab("Likelyhood of Monster Being Male (1) or Female (0)") +
  xlab("Head Width (mm)") +
  labs(title="Binomial Sex v. Head Width Graph")
## `geom_smooth()` using formula 'y ~ x'

display(Gila_Sex_v_HW_BinomialModel)
## glm(formula = Bi_Sex ~ HW, family = binomial(link = "logit"), 
##     data = Gila_Data)
##             coef.est coef.se
## (Intercept) -7.23     1.29  
## HW           0.17     0.03  
## ---
##   n = 290, k = 2
##   residual deviance = 360.8, null deviance = 400.4 (difference = 39.5)
x <- predict(Gila_Sex_v_HW_BinomialModel)
y <- resid(Gila_Sex_v_HW_BinomialModel)
binnedplot(x, y)

coef(Gila_Sex_v_HW_BinomialModel)
## (Intercept)          HW 
##  -7.2330379   0.1670862
confint(Gila_Sex_v_HW_BinomialModel)
## Waiting for profiling to be done...
##                 2.5 %     97.5 %
## (Intercept) -9.856486 -4.7863914
## HW           0.111852  0.2264593
summary(Gila_Sex_v_HW_BinomialModel)
## 
## Call:
## glm(formula = Bi_Sex ~ HW, family = binomial(link = "logit"), 
##     data = Gila_Data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7391  -1.0857   0.5638   1.0070   1.8603  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -7.23304    1.29012  -5.606 2.06e-08 ***
## HW           0.16709    0.02916   5.729 1.01e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 400.35  on 289  degrees of freedom
## Residual deviance: 360.85  on 288  degrees of freedom
## AIC: 364.85
## 
## Number of Fisher Scoring iterations: 4

Vernal_Pool_Database_nonNA <- subset(Vernal_Pool_Database, is.na(Max_RSYL) == FALSE)

hist(Vernal_Pool_Database_nonNA$Max_AMAC, breaks = 50, col = "lightgoldenrod1", xlab = "A. maculatum Abundance", main = "Spotted Salamander Abundance Histogram")

hist(Vernal_Pool_Database_nonNA$Max_RSYL, breaks = 50, col = "saddlebrown", xlab = "L. sylvaticus Abundance", main = "Wood Frog Abundance Histogram")

hist(Vernal_Pool_Database_nonNA$Max_Depth, breaks = 50, col = "turquoise4", xlab = "Max Pool Depth", main = "Pool Depth Histogram")

ggplot(Vernal_Pool_Database_nonNA, aes(x = Max_Depth, y = Max_AMAC)) +
  geom_point(shape = 21, color = "black", fill = "lightgoldenrod1", alpha = .8) +
  geom_smooth(method="glm", method.args=list(family="poisson"(link="log")), color = "black") +
  xlab("Pool Depth (cm)") +
  ylab("Obverved A. maculatum Occupying Pool") +
  labs(title="Spotted Salamander Abundance at Vernal Pool Based on Pool Depth")
## `geom_smooth()` using formula 'y ~ x'

AMAC.glm <- glm(Max_AMAC ~ Max_Depth, data = Vernal_Pool_Database_nonNA, family = poisson)

anova(AMAC.glm)
## Analysis of Deviance Table
## 
## Model: poisson, link: log
## 
## Response: Max_AMAC
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev
## NULL                       4079     329106
## Max_Depth  1    65011      4078     264095
summary(AMAC.glm)
## 
## Call:
## glm(formula = Max_AMAC ~ Max_Depth, family = poisson, data = Vernal_Pool_Database_nonNA)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -35.089   -5.275   -4.378   -1.824   79.869  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 1.959e+00  5.877e-03   333.4   <2e-16 ***
## Max_Depth   1.981e-02  6.452e-05   307.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 329106  on 4079  degrees of freedom
## Residual deviance: 264095  on 4078  degrees of freedom
##   (51 observations deleted due to missingness)
## AIC: 272637
## 
## Number of Fisher Scoring iterations: 7
exp(1.959+.01981*100)
## [1] 51.4186

ggplot(Vernal_Pool_Database_nonNA, aes(x = Max_Depth, y = Max_RSYL)) +
  geom_point(shape = 24, color = "Black", fill = "saddlebrown", alpha = .8) +
  geom_smooth(method="glm", method.args=list(family="poisson"(link="log")), color = "black") +
  xlab("Pool Depth (cm)") +
  ylab("Obverved L. sylvaticus Occupying Pool") +
  labs(title="Wood Frog Abundance at Vernal Pool Based on Pool Depth")
## `geom_smooth()` using formula 'y ~ x'

RSYL.glm <- glm(Max_RSYL ~ Max_Depth, data = Vernal_Pool_Database_nonNA, family = poisson)

anova(RSYL.glm)
## Analysis of Deviance Table
## 
## Model: poisson, link: log
## 
## Response: Max_RSYL
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev
## NULL                       4079     329674
## Max_Depth  1    36950      4078     292724
summary(RSYL.glm)
## 
## Call:
## glm(formula = Max_RSYL ~ Max_Depth, family = poisson, data = Vernal_Pool_Database_nonNA)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -27.537   -5.741   -4.888   -2.134   84.276  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) 2.239e+00  5.763e-03   388.5   <2e-16 ***
## Max_Depth   1.608e-02  7.186e-05   223.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 329674  on 4079  degrees of freedom
## Residual deviance: 292724  on 4078  degrees of freedom
##   (51 observations deleted due to missingness)
## AIC: 301069
## 
## Number of Fisher Scoring iterations: 7
exp(2.239+.01608*100)
## [1] 46.8523